<?php
namespace imcat;
require '../incs/func.php';
$data = comFiles::get(__DIR__.'/celm.htm');
$act = req('act', 'elm');
?>
<!DOCTYPE html><html><head>
<?php glbHtml::page('init'); ?>
</head>
<body>
<nav>
<a href="?act=pq1">extQuery采集1</a>
<a href="?act=pq2">extQuery采集2</a>
<a href="?act=elm">元素分离</a>
</nav>
<?php if($act=='pq2'){ ?>
<h3>采集页面2</h3>
<?php
$url2 = 'https://dg.haofang.net/ershoufang/p2.html';
$lists = extQuery::pqa([$url2,30],'.list-content li.block');
$timer2 = microtime(1);
foreach($lists as $li) {
$tmp = pq($li)->find('div.title a');
$row['url'] = pq($tmp)->attr('href');
$row['title'] = pq($tmp)->text();
$row['thumb'] = pq($li)->find('img:first')->attr('src');
$row['area'] = pq($li)->find('.address')->text();
$row['price'] = pq($li)->find('.total-price')->find('span')->text();
$row['punit'] = pq($li)->find('.unit-price')->text();
dump($row); //echo implode(', ', $row)."<br>\n";
}
$timer1 = microtime(1);
$url2 = 'http://hezhou.loupan.com/xinfang/p1/';
$lists = extQuery::pqa([$url2,30],'.list-house li.item');
$timer2 = microtime(1);
foreach($lists as $li) {
$row['url'] = pq($li)->find('a:first')->attr('href');
$img = pq($li)->find('img:first');
$thumb = pq($img)->attr('data-src');
$row['thumb'] = strpos($thumb,'images/nopic.') ? '' : $thumb;
$row['title'] = pq($img)->attr('alt');
$row['area'] = pq($li)->find('.address')->find('span')->text();
$row['price'] = pq($li)->find('.price')->text();
dump($row); //echo implode(', ', $row)."<br>\n"; die();
}
$timer3 = microtime(1);
$tmp = $timer2-$timer1; dump($tmp);
$tmp = $timer3-$timer2; dump($tmp);
$data = '
<body>
<div class="topCity-content">
<a href="http://gz.fzg360.com">广州</a>
<a href="http://sz.fzg360.com">深圳</a>
<a href="http://dg.fzg360.com">东莞</a>
</div>
</body>
';
$lists = extQuery::pqa($data,'a',0);
foreach($lists as $li) {
echo pq($li)->text()."<br>\n";
}
echo '-end-1'; //die('xx');
$doc = extQuery::newDocumentFile('http://m.gz.fzg360.com/index/citylist.html');
$lists = extQuery::pqa($doc,'span');
foreach($lists as $li) {
echo pq($li)->text()."<br>\n";
}
#dump($doc);
?>
<?php } if($act=='pq1'){ ?>
<h3>采集页面1</h3>
<?php
$url1 = 'http://hezhou.loupan.com/xinfang/p1/';
$url2 = 'http://hezhou.loupan.com/xinfang/p2/';
$doc1 = extQuery::newDocumentFile($url1);
$doc2 = extQuery::newDocumentFile($url2);
$did1 = $doc1->getDocumentID(); echo "$did1<br>\n";
$did2 = $doc2->getDocumentID(); echo "$did2<br>\n";
echo "<pre>\n";
#phpQuery::selectDocument($doc1);
echo "<hr>\n";
$lists = [];
$lis1 = pq('.list-house li.item', $did1);
foreach($lis1 as $li) {
$row['url'] = pq($li)->find('a:first')->attr('href');
$img = pq($li)->find('img:first');
$thumb = pq($img)->attr('data-src');
$row['thumb'] = strpos($thumb,'images/nopic.') ? '' : $thumb;
$row['title'] = pq($img)->attr('alt');
$row['area'] = pq($li)->find('.address')->text();
$row['price'] = pq($li)->find('.price')->text();
$lists[] = $row;
}
print_r($lists);
#phpQuery::selectDocument($doc2);
echo "<hr>\n";
$lis2 = pq('.list-house li.item', $did2);
foreach($lis2 as $li) {
$tmp = pq($li)->find('h2')->text();
echo "$tmp<br>\n";
}
echo "<hr>\n";
#var_dump($doc1);
?>
<?php } if($act=='elm'){ ?>
<h3>分离html元素</h3>
<li>采集目标页:<a href="./celm.htm" target='_celm'>celm.htm</a></li>
<?php
$val = basElm::getVal($data,'title'); echo "\n\n<hr>title-val:\n$val\n";
$val = basElm::getPos($data,'title'); echo "\n\n<hr>title-pos:\n$val\n";
$val = basElm::getVal($data,'id="link"(*)id="test"','->'); echo "\n\n<hr>val:\n$val>>>\n";
$val = basElm::getPos($data,'id="link"(*)id="test"'); echo "\n\n<hr>pos:\n$val>>>\n";
$val = basElm::getVal($data,'<div class="content">(*)</div>'); echo "\n\n<hr>val2:\n$val>>>\n";
$val = basElm::getPos($data,'<div class="content">(*)id="link"'); echo "\n\n<hr>pos2:\n$val>>>\n";
$val = basElm::getPos($data,'<div class="content">(*)</div>'); echo "\n\n<hr>pos3:\n$val>>>\n";
$val = basElm::getPos($data,'id="xnon15"(*)id="xnon32"'); echo "\n\n<hr>pos4:\n$val>>>\n";
$arr = basElm::getArr($data,'<li class(*)</li>'); echo "\n\n<hr>getArr:\n"; print_r($arr); echo "\n";
$arr = basElm::getPreg($data,'<li class="cls1">(*)</li>'); echo "\n\n<hr>getPreg:\n"; print_r($arr); echo "\n";
$arr = basElm::getAttr($data,'target','key'); echo "\n\n<hr>getArr-a:\n"; print_r($arr); echo "\n";
$val = basElm::getAttr($data,'target','key',1); echo "\n\n<hr>getAttr-no:\n$val\n";
$arr = basElm::getAttr($data,'noattr','key'); echo "\n\n<hr>getArr-a:\n"; print_r($arr); echo "\n";
$val = basElm::getAttr($data,'witdh','key',0); echo "\n\n<hr>getAttr-witdh:\n$val\n";
$arr = basElm::getAttr($data,'href','url'); echo "\n\n<hr>getArr-urls:\n"; print_r($arr); echo "\n";
}
?>
</body>
</html>
-End-